#### POLITICAL SCIENCE STUDIES ONLY ####
#### FIGURE G1 ####
library(tidyverse)
library(broom)
library(estimatr)
library(deming)
library(xtable)
set.seed(78712)

# function to extract ATEs
Get_ATE <- function(data, subsample = c(0,1)) {
      out <- tidy(lm_robust(formula(data$FORMULA[1]), data, 
                            subset = TNRFU %in% subsample))
      out %>% slice_tail() %>%
            mutate(StudyID = data$StudyId[1])
}

# set working directory
setwd("data/analysis/cleaned/")

# list of studies in political science
polisci <- c(1, 3, 6, 8:13, 18, 19, 21, 24, 25, 27:29, 
             31, 32, 34, 35, 39, 40, 43, 45, 46, 49, 53, 56)

##### WHOLE SAMPLE #####
# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

all_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

all_se_int <- sd(all_se_ints)

## Estimate Deming
dem_all <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = F)

# put bootstrapped intercepts and betas into a data frame
boots <- tibble(alpha = all_se_ints, beta = all_se_slopes)

## create band
dem_dat <- dem_dat |> mutate(prediction = dem_all$coefficients["(Intercept)"] + estimate.e * dem_all$coefficients["estimate.e"],
                             ## create the upper bound
                             upper_band = prediction + 1.96 * sqrt(var(boots$alpha) + estimate.e^2 * var(boots$beta) - 2 * estimate.e * cov(boots$alpha, boots$beta)),
                             ## create the lower bound
                             lower_band = prediction - 1.96 * sqrt(var(boots$alpha) + estimate.e^2 * var(boots$beta) - 2 * estimate.e * cov(boots$alpha, boots$beta))
)

## Plot FIGURE G1 ##
ggplot(dem_dat, aes(x = estimate.e, y = estimate.r)) + 
      geom_point() + 
      geom_ribbon(aes(ymin = lower_band, ymax = upper_band), 
                  fill = "gray70", alpha = 0.5) +
      geom_errorbar(aes(xmin = conf.low.e, xmax = conf.high.e), alpha = .25) + 
      geom_errorbar(aes(ymin = conf.low.r, ymax = conf.high.r), alpha = .25) + 
      geom_hline(yintercept = 0, alpha = .5, linetype = 2) + 
      geom_vline(xintercept = 0, alpha = .5, linetype = 2) + 
      geom_abline(slope = 1, intercept = 0, alpha = .5, linetype = 2) +
      geom_abline(intercept = dem_all$coefficients[1], slope = dem_all$coefficients[2], alpha = .8) +
      xlab("Standardized Effect among Eager Respondents") + 
      ylab("Standardized Effect among Reluctant Respondents") +
      coord_cartesian(xlim =c(-.92, .92), ylim = c(-.92, .92)) +
      theme_classic() +
      theme(legend.position = c(.85, .3),
            legend.title = element_text(size=14),
            legend.text = element_text(size=14),
            axis.text = element_text(size=14),
            axis.title = element_text(size=15),
            axis.ticks.length = unit(2, "mm")) 

# save
ggsave("../../../results/Figure_G1.pdf")



#### FIGURE G2 ####

##### GENDER #####
## Men ##
# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, GENDER == 1)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs
all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

men_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

men_se_int <- sd(all_se_ints)

# Deming regression
dem_men <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = T)


## Women ##
# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, GENDER == 2)
      
      if(nrow(d)==0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs
all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

women_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

women_se_int <- sd(all_se_ints)

# Deming regression
dem_women <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                    data = dem_dat, jackknife = T)

##### AGE (3 cat.) #####
## 18- to 39-year-olds ##
# set working directory

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, AGE3 = case_when(
            between(AGE, 18, 39) ~ "18-39",
            between(AGE, 40, 59) ~ "40-59",
            between(AGE, 60, 150) ~ "60+"
      ))
      d <- filter(d, AGE3 == "18-39")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

age1_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

age1_se_int <- sd(all_se_ints)

# Deming regression
dem_age1 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


## 40- to 59-year-olds ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, AGE3 = case_when(
            between(AGE, 18, 39) ~ "18-39",
            between(AGE, 40, 59) ~ "40-59",
            between(AGE, 60, 150) ~ "60+"
      ))
      d <- filter(d, AGE3 == "40-59")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

age2_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

age2_se_int <- sd(all_se_ints)

# Deming regression
dem_age2 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


## 60+-year-olds ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, AGE3 = case_when(
            between(AGE, 18, 39) ~ "18-39",
            between(AGE, 40, 59) ~ "40-59",
            between(AGE, 60, 150) ~ "60+"
      ))
      d <- filter(d, AGE3 == "60+")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

age3_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

age3_se_int <- sd(all_se_ints)

# Deming regression
dem_age3 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


#### Party ID (3 cat.) ####
### Democrats ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, pid3 = case_when(
            between(PartyID7, 1, 3) ~ "Democrat",
            between(PartyID7, 4, 4) ~ "Independent",
            between(PartyID7, 5, 7) ~ "Republican",
            TRUE ~ NA_character_))
      d <- filter(d, pid3 == "Democrat")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

dems_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

dems_se_int <- sd(all_se_ints)

# Deming regression
dem_dems <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


### Independents ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, pid3 = case_when(
            between(PartyID7, 1, 3) ~ "Democrat",
            between(PartyID7, 4, 4) ~ "Independent",
            between(PartyID7, 5, 7) ~ "Republican",
            TRUE ~ NA_character_))
      d <- filter(d, pid3 == "Independent")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

ind_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

ind_se_int <- sd(all_se_ints)

# Deming regression
dem_ind <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = T)


### Republicans ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, pid3 = case_when(
            between(PartyID7, 1, 3) ~ "Democrat",
            between(PartyID7, 4, 4) ~ "Independent",
            between(PartyID7, 5, 7) ~ "Republican",
            TRUE ~ NA_character_))
      d <- filter(d, pid3 == "Republican")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

rep_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

rep_se_int <- sd(all_se_ints)

# Deming regression
dem_rep <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                  data = dem_dat, jackknife = T)


#### EDUCATION (3 cat.) ####

### HS Degree or less ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, EDUC4 = case_when(
            EDUC < 9 ~ "1No HS degree",
            EDUC == 9 ~ "2HS degree", 
            between(EDUC, 10, 11) ~ "3Some college",
            EDUC > 11 ~ "4BA or higher"))
      
      d <- filter(d, EDUC4 == "1No HS degree" | EDUC4 == "2HS degree")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

nohs_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

nohs_se_int <- sd(all_se_ints)

# Deming regression
dem_nohs <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


### Some college ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, EDUC4 = case_when(
            EDUC < 9 ~ "1No HS degree",
            EDUC == 9 ~ "2HS degree", 
            between(EDUC, 10, 11) ~ "3Some college",
            EDUC > 11 ~ "4BA or higher"))
      
      d <- filter(d, EDUC4 == "3Some college")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

somecoll_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

somecoll_se_int <- sd(all_se_ints)

# Deming regression
dem_somecoll <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                       data = dem_dat, jackknife = T)


### BA or higher ###

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, EDUC4 = case_when(
            EDUC < 9 ~ "1No HS degree",
            EDUC == 9 ~ "2HS degree", 
            between(EDUC, 10, 11) ~ "3Some college",
            EDUC > 11 ~ "4BA or higher"))
      
      d <- filter(d, EDUC4 == "4BA or higher")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

ba_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

ba_se_int <- sd(all_se_ints)

# Deming regression
dem_ba <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                 data = dem_dat, jackknife = T)


#### INCOME (3 cat.) #### 

## Lower Third of Income ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, INCOME3 = case_when(
            between(INCOME, 1, 8) ~ "Low Income",
            between(INCOME, 9, 12) ~ "Middle Income",
            between(INCOME, 13, 18) ~ "High Income"
      ))
      d <- filter(d, INCOME3 == "Low Income")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

inc1_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

inc1_se_int <- sd(all_se_ints)

# Deming regression
dem_inc1 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


## Middle Third of Income ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, INCOME3 = case_when(
            between(INCOME, 1, 8) ~ "Low Income",
            between(INCOME, 9, 12) ~ "Middle Income",
            between(INCOME, 13, 18) ~ "High Income"
      ))
      d <- filter(d, INCOME3 == "Middle Income")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

inc2_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

inc2_se_int <- sd(all_se_ints)

# Deming regression
dem_inc2 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


## Upper Third of Income ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- mutate(d, INCOME3 = case_when(
            between(INCOME, 1, 8) ~ "Low Income",
            between(INCOME, 9, 12) ~ "Middle Income",
            between(INCOME, 13, 18) ~ "High Income"
      ))
      d <- filter(d, INCOME3 == "High Income")
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

inc3_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

inc3_se_int <- sd(all_se_ints)

# Deming regression
dem_inc3 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


#### RACE ####
## White ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, RACETHNICITY == 1)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

white_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

white_se_int <- sd(all_se_ints)

# Deming regression
dem_white <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                    data = dem_dat, jackknife = T)


## Non-white ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, RACETHNICITY != 1)
      
      if(nrow(d) == 0){next}
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

nonwhite_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

nonwhite_se_int <- sd(all_se_ints)

# Deming regression
dem_nonwhite <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                       data = dem_dat, jackknife = T)


#### METRO AREA ####

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, METRO == 1)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

metro1_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

metro1_se_int <- sd(all_se_ints)

# Deming regression
dem_metro1 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                     data = dem_dat, jackknife = T)


## Lives in a Non-metro Area ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, METRO == 0)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
dem_dat <- filter(dem_dat, df.r > 10) # dropping cases problematically small N

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

metro0_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

metro0_se_int <- sd(all_se_ints)

# Deming regression
dem_metro0 <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                     data = dem_dat, jackknife = T)


#### PHONE SERVICE ####

## Landline ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, PHONESERVICE == 1 | PHONESERVICE == 3)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))
dem_dat <- filter(dem_dat, df.r > 10) # dropping cases problematically small N

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

landline_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

landline_se_int <- sd(all_se_ints)

# Deming regression
dem_landline <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                       data = dem_dat, jackknife = T)


## Cellphone ##

# start by making an empty data set 
all_results <- tibble(term = NA, estimate = NA, std.error = NA, statistic = NA,
                      p.value = NA, conf.low = NA, conf.high = NA, df = NA, 
                      outcome = NA, StudyID = NA, subsample = NA)

for(i in 1:length(list.files())){
      files <- list.files()
      d <- read_csv(files[i])
      d <- filter(d, PHONESERVICE == 2 | PHONESERVICE == 4)
      
      study_results <- bind_rows(Get_ATE(d),
                                 Get_ATE(d, 0), # eager respondents
                                 Get_ATE(d, 1)) # reluctant respondents
      study_results <- add_column(study_results, subsample = c("full", "eager", "reluctant"))
      
      all_results <- add_row(all_results, study_results)
}

all_results <- all_results[-1, ] #drop first row since it's all NAs

all_results <- filter(all_results, StudyID %in% polisci)

eagers <- filter(all_results, subsample == "eager")
reluctants <- filter(all_results, subsample == "reluctant")

dem_dat <- full_join(eagers, reluctants, by = "StudyID", suffix = c(".e", ".r"))

# Creating study blocks b/c Studies c(1, 2, 5); c(33, 35, 36); c(40, 43) shared time
dem_dat <- dem_dat |> group_by(StudyID) |>
      ungroup() |>
      mutate(study.cluster = case_when(
            StudyID %in% c(1, 2, 5) ~ 66,
            StudyID %in% c(33, 35, 36) ~ 77, 
            StudyID %in% c(40, 43) ~ 88,
            TRUE ~ StudyID))

# creating block bootstrap samples
dboots <- group_bootstraps(dem_dat, "study.cluster", times = 2000)

# estimating Deming on each block bootstrapped sample
all_se_slopes <- map_dbl(dboots$splits,
                         function(x) {
                               dat <- as.data.frame(x)
                               dem_lm <- deming(estimate.r ~ estimate.e, 
                                                ystd = std.error.r, xstd = std.error.e,
                                                data = dat, jackknife = T)
                               dem_lm$coefficients[2]
                         }
)

cell_se_slope <- sd(all_se_slopes)

all_se_ints <- map_dbl(dboots$splits,
                       function(x) {
                             dat <- as.data.frame(x)
                             dem_lm <- deming(estimate.r ~ estimate.e, 
                                              ystd = std.error.r, xstd = std.error.e,
                                              data = dat, jackknife = T)
                             dem_lm$coefficients[1]
                       }
)

cell_se_int <- sd(all_se_ints)

# Deming regression
dem_cell <- deming(estimate.r ~ estimate.e, ystd = std.error.r, xstd = std.error.e,
                   data = dem_dat, jackknife = T)


##### Constructing Dataset of Coefficients #####

subgroup <- c("All", "Men", "Women", "Age: 18-39", "Age: 40-59", "Age: 60+",
              "Democrats", "Independents", "Republicans", "HS or less",
              "Some college", "College or more", "Low-Income", "Mid-Income",
              "High-Income", "Whites", "Non-Whites", "Metro", "Non-Metro", 
              "Landline", "Cellphone")

ints <- c(dem_all$coefficients[1],
          dem_men$coefficients[1],
          dem_women$coefficients[1],
          dem_age1$coefficients[1],
          dem_age2$coefficients[1],
          dem_age3$coefficients[1],
          dem_dems$coefficients[1],
          dem_ind$coefficients[1],
          dem_rep$coefficients[1],
          dem_nohs$coefficients[1],
          dem_somecoll$coefficients[1],
          dem_ba$coefficients[1],
          dem_inc1$coefficients[1],
          dem_inc2$coefficients[1],
          dem_inc3$coefficients[1],
          dem_white$coefficients[1],
          dem_nonwhite$coefficients[1],
          dem_metro1$coefficients[1],
          dem_metro0$coefficients[1],
          dem_landline$coefficients[1],
          dem_cell$coefficients[1]
)

se.ints <- c(all_se_int,
             men_se_int,
             women_se_int,
             age1_se_int,
             age2_se_int,
             age3_se_int,
             dems_se_int,
             ind_se_int,
             rep_se_int,
             nohs_se_int,
             somecoll_se_int,
             ba_se_int,
             inc1_se_int,
             inc2_se_int,
             inc3_se_int,
             white_se_int,
             nonwhite_se_int,
             metro1_se_int,
             metro0_se_int,
             landline_se_int,
             cell_se_int
)

ests <- c(dem_all$coefficients[2],
          dem_men$coefficients[2],
          dem_women$coefficients[2],
          dem_age1$coefficients[2],
          dem_age2$coefficients[2],
          dem_age3$coefficients[2],
          dem_dems$coefficients[2],
          dem_ind$coefficients[2],
          dem_rep$coefficients[2],
          dem_nohs$coefficients[2],
          dem_somecoll$coefficients[2],
          dem_ba$coefficients[2],
          dem_inc1$coefficients[2],
          dem_inc2$coefficients[2],
          dem_inc3$coefficients[2],
          dem_white$coefficients[2],
          dem_nonwhite$coefficients[2],
          dem_metro1$coefficients[2],
          dem_metro0$coefficients[2],
          dem_landline$coefficients[2],
          dem_cell$coefficients[2]
)

se.ests <- c(all_se_slope,
             men_se_slope,
             women_se_slope,
             age1_se_slope,
             age2_se_slope,
             age3_se_slope,
             dems_se_slope,
             ind_se_slope,
             rep_se_slope,
             nohs_se_slope,
             somecoll_se_slope,
             ba_se_slope,
             inc1_se_slope,
             inc2_se_slope,
             inc3_se_slope,
             white_se_slope,
             nonwhite_se_slope,
             metro1_se_slope,
             metro0_se_slope,
             landline_se_slope,
             cell_se_slope
)

ci.l <- c(dem_all$ci[2,1],
          dem_men$ci[2,1],
          dem_women$ci[2,1],
          dem_age1$ci[2,1],
          dem_age2$ci[2,1],
          dem_age3$ci[2,1],
          dem_dems$ci[2,1],
          dem_ind$ci[2,1],
          dem_rep$ci[2,1],
          dem_nohs$ci[2,1],
          dem_somecoll$ci[2,1],
          dem_ba$ci[2,1],
          dem_inc1$ci[2,1],
          dem_inc2$ci[2,1],
          dem_inc3$ci[2,1],
          dem_white$ci[2,1],
          dem_nonwhite$ci[2,1],
          dem_metro1$ci[2,1],
          dem_metro0$ci[2,1],
          dem_landline$ci[2,1],
          dem_cell$ci[2,1]
)

ci.h <- c(dem_all$ci[2,2],
          dem_men$ci[2,2],
          dem_women$ci[2,2],
          dem_age1$ci[2,2],
          dem_age2$ci[2,2],
          dem_age3$ci[2,2],
          dem_dems$ci[2,2],
          dem_ind$ci[2,2],
          dem_rep$ci[2,2],
          dem_nohs$ci[2,2],
          dem_somecoll$ci[2,2],
          dem_ba$ci[2,2],
          dem_inc1$ci[2,2],
          dem_inc2$ci[2,2],
          dem_inc3$ci[2,2],
          dem_white$ci[2,2],
          dem_nonwhite$ci[2,2],
          dem_metro1$ci[2,2],
          dem_metro0$ci[2,2],
          dem_landline$ci[2,2],
          dem_cell$ci[2,2]
)

n <- c(dem_all$n,
       dem_men$n,
       dem_women$n,
       dem_age1$n,
       dem_age2$n,
       dem_age3$n,
       dem_dems$n,
       dem_ind$n,
       dem_rep$n,
       dem_nohs$n,
       dem_somecoll$n,
       dem_ba$n,
       dem_inc1$n,
       dem_inc2$n,
       dem_inc3$n,
       dem_white$n,
       dem_nonwhite$n,
       dem_metro1$n,
       dem_metro0$n,
       dem_landline$n,
       dem_cell$n
)

tab <- tibble(subgroup = subgroup, ints = ints, se.ints = se.ints,
              ests = ests, se.ests = se.ests, n = n)

##### PLOTTING COEFFICIENT ESTIMATES #####
# Calculating Benjamini-Hochman CIs
# for slopes
slope_dat <- tab %>% 
      mutate(effect.size = abs( (ests-1)/se.ests ),
             rank = rank(-effect.size),
             alpha.hat = rank * .05/21,
             t.critical.fdr = qt(1 - (alpha.hat/2), 21),
             lower.fdr = ests - t.critical.fdr*se.ests,
             upper.fdr = ests + t.critical.fdr*se.ests) 


# for intercepts
intercept_dat <- tab %>%
      mutate(effect.size = abs( (ints)/se.ints ),
             rank = rank(-effect.size),
             alpha.hat = rank * .05/21,
             t.critical.fdr = qt(1 - (alpha.hat/2), 21),
             lower.fdr = ints - t.critical.fdr*se.ints,
             upper.fdr = ints + t.critical.fdr*se.ints) 



### Plotting slope coefficients
sg.ord <- 21:1

### MAKING FIGURE WITH DEFAULT AXES
sl <- slope_dat %>% 
      ggplot(aes(reorder(subgroup, sg.ord), ests)) + 
      geom_pointrange(aes(ymin = lower.fdr, ymax = upper.fdr)) + 
      coord_flip(ylim = c(0, 2.5)) + 
      geom_hline(yintercept = 1, linetype = 2) + 
      labs(y = "Slope Estimates & 95% CIs", x = "Subgroup") +
      theme_bw() + 
      theme(text=element_text(size=16))


# Plotting intercepts
int <- intercept_dat %>% 
      ggplot(aes(reorder(subgroup, sg.ord), ints)) + 
      geom_pointrange(aes(ymin = lower.fdr, ymax = upper.fdr)) + 
      #scale_y_continuous(breaks = c(-.1, -.05, 0, .05, .1)) +
      coord_flip(ylim = c(-.5, .3)) + 
      geom_hline(yintercept = 0, linetype = 2) + 
      labs(y = "Intercept Estimates & 95% CIs", x = "") +
      theme_bw() + 
      theme(text=element_text(size=16),
            axis.text.y = element_blank())

# arrange plots
ggpubr::ggarrange(sl, int, widths = c(1.25,1))

# save
ggsave("../../../results/Figure_G2.pdf")

##### TABLE G4 #####

write_file(print(xtable(tab, "Coefficient Estimates and Bootstrapped Standard Errors 
       from Deming Regressions of Eager ATE on Reluctant ATE", 
             label = "app:deming_tab_PS_only",
             digits = 3), include.rownames = F), 
           file = "../../../results/Table_G4.tex")

